clear 
set more off
cap log cl

// --------------------------------
//
//  CPS May extracts 77-82
//
// --------------------------------

tempfile temp

forval x = 77/82 {
	
	use ${raw}/cps/cpsmay`x'.dta, clear
	
	capture append using "`temp'"
	save "`temp'", replace
	
}


// From NBER
*by Jean Roth Mon Mar 19 12:53:41 EST 2001
*Please report errors to jroth@nber.org
*Change output file name/location as desired

*run by opening stata, set mem 50m, 
*use cpsmayYY, where YY is the year of interest, do labels 
*rename variables with, for example, rename x1 rectype

// Missing values
mvdecode _all ,mv(-99=.)

label variable x1 "Record type"
	rename x1 rectype
label variable x2 "Month in sample"
	rename x2 month
label variable x3 "Sample (A or C)"
	rename x3 sample
label variable x4 "Random Cluster (first ID field)"
	rename x4 cluster
label variable x5 "Segment number (second ID field)"
	rename x5 segment
label variable x6 "Serial number (third ID field)"
	rename x6 serial
label variable x7 "State Codes - 1969-72, 77-84"
label variable x8 "Region"
	rename x8 region
	label define region 1 "Northeast" 2 "Midwest" 3 "South" 4 "West" 
	label val region region
label variable x9 "*State Codes - 1975 version"
	rename x9 state75
label variable x10 "Regional office"
label variable x11 "*SMSA rankings"
label variable x12 "Interviewer check"
label variable x13 "Noninterview cluster"
label variable x14 "Line number of respondent"
label variable x15 "Type of interview"
label variable x16 "Date completed"
rename x14 lineno
	drop x10
	drop x12 x13 x15 x16
label variable x17 "Did X do any work  last week?"
	rename x17 worklw
label variable x18 "Interviewer check  (hours range)"
label variable x19 "Absent from job or on layoff?"
label variable x20 "Looking for work last 4 weeks?"
label variable x21 "Check (continuing/departing rot.)"
label variable x22 "Sample (A or C)"
label variable x23 "*Land usage (urban/rural, farm)"
label variable x24 "Type of living quarters"
label variable x25 "*SMSA status code (central city)"
label variable x26 "Household number"
rename x26 hhnum
	drop x18-x24
	*drop x26
label variable x27 "Major activity last week"
	rename x27 activitylw
label variable x28 "Hours worked last week all jobs 1-99"
	rename x28 hourslw
label variable x29 "Usually work 35+ hours this job"
	rename x29 usual35
label variable x30 "*Reason less than 35 hours worked"
label variable x31 "Why absent from work"
label variable x32 "Getting wages for time off"
label variable x33 "Usually work 35+ hours this job"
label variable x34 "What doing to find work  #1"
label variable x35 "What doing to find work  #2"
label variable x36 "What doing to find work  #3"
label variable x37 "What doing to find work  #4"
label variable x38 "What doing to find work  #5"
label variable x39 "What doing to find work  #6"
label variable x40 "What doing to find work  #7"
label variable x41 "Why started looking for work"
label variable x42 "Weeks unemployed"
label variable x43 "Looking for full or part-time"
label variable x44 "Reason couldn't take job? (Y/N)"
label variable x45 "(lists reason)"
label variable x46 "When last worked full-time"
label variable x47 "When last worked for pay"
label variable x48 "Why left that job?"
label variable x49 "Wants regular job now?"
label variable x50 "Why not looking for work #1"
label variable x51 "Reason not looking       #2"
label variable x52 "Reason not looking #3"
label variable x53 "Reason not looking #4"
label variable x54 "Reason not looking #5"
label variable x55 "Reason not looking #6"
label variable x56 "Reason not looking #7"
label variable x57 "Reason not looking #8"
label variable x58 "Reason not looking #9"
label variable x59 "Reason not looking #10"
label variable x60 "Reason not looking #11"
label variable x61 "Intends to look in next 12 months?"
	drop x30-x61
label variable x62 "*Class of worker"
	rename x62 class
	label define class 1 "Private" 2 "Government" 3 "Self-employed" 4 "Without pay"
	label values class class
	replace class=. if class==5
label variable x63 "*Industry"
rename x63 ind70
label variable x64 "*Occupation"
rename x64 occ70
label variable x65 "Line number"
rename x65 hhid
label variable x66 "Relationship to head of household"
	*drop x65 x66
label variable x67 "Age"
	rename x67 age
label variable x68 "Marital status"
	recode x68 (1/3=1 "married") (4/5=0 "not married"), gen(married)
	drop x68
label variable x69 "Race"
	rename x69 race
	lab def racelbl ///
		1 "White" ///
		2 "Black" ///
		3 "other" 
	lab val race racelbl	
label variable x70 "Sex"
rename x70 sex
gen female=sex==2
drop sex
label variable x71 "Veteran status"
	recode x71 (1/5=1 "Vet" ) (6=0 "NonVet"), gen(Veteran)
	drop x71

/* Education Variables */

	label variable x72 "Highest grade attended"
	rename x72 gradeat
	replace gradeat = gradeat-1
	label variable x73 "Grade completed?"
	rename x73 gradecp
	
	* Years of school
	gen yearsch=.
	replace yearsch=gradeat   if  gradecp==1
	replace yearsch=gradeat-1 if  gradecp==2
	replace yearsch=0         if  gradeat==0
	g yearsch_1=yearsch
	g yearsch_2=yearsch

	* Degree completed
	gen byte educ=1 if 1<=gradeat & gradeat<=11
			replace educ=1 if gradeat==12 & gradecp==2 /* didn't complete 12th */
			replace educ=2 if gradeat==12 & gradecp==1 /* completed 12th */
			replace educ=3 if 13<=gradeat & gradeat<=15
			replace educ=3 if gradeat==16 & gradecp==2 /* didn't complete college */
			replace educ=4 if gradeat==16 & gradecp==1 /* completed college */
			replace educ=4 if gradeat==17 /* "completed 4 or 5 years college" */
			replace educ=5 if 18<=gradeat & gradeat~=.
			
			
	
		
		
label variable x74 "Family number for subdivided hhld"
	drop x74
label variable x75 "Employment status recode"
	recode x75 (1/2=1 "Employed") (3 = 2 "Unemployed") (4/7 = 3 "Not in LF"), gen(lfstat)
	
	/* Employed */
	gen byte empl=0 if lfstat~=.
	replace empl=1 if lfstat==1
	lab var empl "Employed"
	notes empl: CPS: derived from a-lfsr, pemlr

	/* Unemployed */
	gen byte unem=0 if lfstat~=.
	replace unem=1 if lfstat==2
	lab var unem "Unemployed"
	notes unem: CPS: derived from a-lfsr, pemlr

	/* Not in labor force */
	gen byte nilf=0 if lfstat~=.
	replace nilf=1 if lfstat==3
	lab var nilf "Not in labor force"
	notes nilf: CPS: derived from a-lfsr, pemlr

label variable x76 "Principal person of hhld?"
label variable x77 "Document count"
label variable x78 "Month"
label variable x79 "Year (last digit)"
	drop x75-x79
label variable x80 "Weight (2 implied  decimals)"
	rename x80 weight
	replace weight=weight/100
label variable x81 "Errors charged to   enumerator"
label variable x82 "Type of PSU (self  representing?)"
label variable x83 "Incidence of poverty in area"
label variable x84 "SMSA size"
label variable x85 "Ethnicity"
	rename x85 ethnic
	g hispanic=0
	replace hispanic=1 if ethnic>=1 & ethnic<=7
	replace hispanic=. if ethnic==9 | ethnic==.

drop ethnic

label variable x86 "Age recode"
label variable x87 "Residence recode (always missing)"
label variable x88 "Race recode"
label variable x89 "Area recode"
label variable x90 "Poverty area code"
label variable x91 "Part-time status recode"
label variable x92 "Race-sex recode"
label variable x93 "Agricultural wage and salary?"
	drop x81-x88
	drop x90-x93
label variable x94 "Civilian labor force status"
	recode x94 (3=1 "Civilian") (4=0), gen(Civilian)
	drop x94

label variable x95 "Full/part-time status"
label variable x96 "Experienced labor force status"
label variable x97 "Household relationship recode"
label variable x98 "Employed class of worker"
label variable x99 "Major occupation group"
label variable x100 "Labor force by time worked"
label variable x101 "Duration of unemployment"
label variable x102 "In civilian labor force?"
label variable x103 "Unemployed?"
label variable x104 "Unemployed 15+ weeks?"
label variable x105 "Other NILF?"
label variable x106 "Full time labor force?"
label variable x107 "Looking for full-time work?"
label variable x108 "Wage and salary worker?"
label variable x109 "Employed person?"
label variable x110 "Employed(nonfarm, non-hhld work)?"
label variable x111 "Experienced labor force?"
label variable x112 "Full-time experienced labor force?"
label variable x113 "Full-time or economic-part-time?"
label variable x114 "Nonfarm industry?"
label variable x115 "Nonfarm wage and salary?"
label variable x116 "Agriculture?"
	drop x95-x116
label variable x117 "White collar?"
	rename x117 WhiteCollar
label variable x118 "Blue collar?"
	rename x118 BlueCollar
label variable x119 "Manufacturing, wage and salary?"
label variable x120 "Private wage and salary?"
label variable x121 "Part-time for noneconomic reasons?"
label variable x122 "Seeking full time work?"
label variable x123 "Unemployed-no previous experience?"
label variable x124 "Full-time labor force recode"
label variable x125 "Program signal"
label variable x126 "Program signal"
label variable x127 "Age-school recode"
label variable x128 "Age recode"
label variable x129 "Age-major activity recode"
label variable x130 "Age recode"
label variable x131 "Employed status-farm recode"
label variable x132 "Marital status-age recode"
label variable x133 "Marital status-activity recode"
label variable x134 "*Major industry"
label variable x135 "Detailed class of worker"
label variable x136 "Class-employed recode"
label variable x137 "*Major industry"
label variable x138 "*Detailed industry"
label variable x139 "*Major occupation"
label variable x140 "*Detailed occupation"
label variable x141 "*Manufacturing industries"
label variable x142 "Reason not working-hours recode"
label variable x143 "Reason part time-hours recode"
label variable x144 "Detailed reason-hours recode"
label variable x145 "Covered by collective agreement"
label variable x146 "Reason-pay status recode"
label variable x147 "Program signal  so"
label variable x148 "Gross Change employment-industry"
label variable x149 "G.C. expanded employment status"
label variable x150 "G.C. intermed. emp. status"
label variable x151 "G.C. industry"
label variable x152 "G.C. employment-occupation"
label variable x153 "G.C. age"
label variable x154 "G.C. summary age"
label variable x155 "G.C. duration of unemployment"
label variable x156 "G.C. summary duration of unemp."
label variable x157 "G.C. duration by full-part time"
label variable x158 "G.C. employment and NILF"
label variable x159 "G.C. age-employment"
label variable x160 "G.C. age-employment (restricted)"
label variable x161 "G.C. education-employment"
label variable x162 "G.C. class-farm"
label variable x163 "G.C. industry"
label variable x164 "G.C. hours-at work"
label variable x165 "G.C. full/part reason"
label variable x166 "G.C. looking full/part - age"
label variable x167 "Number under 18, related to head"
label variable x168 "Total family income"
label variable x169 "Usual weekly earnings"
label variable x170 "Work for 2+ employers?"
label variable x171 "Operate own business?"
label variable x172 "Have other job (not worked)?"
label variable x173 "Check (40 or more hours)"
label variable x174 "Get higher pay for over 40 hours?"
label variable x175 "Usually work over 40 hours?"
label variable x176 "Did X also work regular job?"
label variable x177 "Another job, not worked?"
label variable x178 "Another job, not worked? (recode)"
label variable x179 "Was second job same?"
label variable x180 "Reason worked second job"
label variable x181 "Hours worked second job"
label variable x182 "Hours worked principal job"
label variable x183 "Check/recode"
drop x119-x183
label variable x184 "Days per week usually works (code)"
	drop x184
label variable x185 "Hours per week usually works"
	rename x185 hours
	replace hours=hourslw if hours==.&!missing(hourslw)
	
label variable x186 "Usually weekly earnings"
	rename x186 uearnwk
label variable x187 "Paid by the hour?"
	gen paidhre = x187==0
	lab var paidhre "Paid by the hour?"
	drop x187
label variable x188 "Earnings per hour (cents)"
	gen uearnhr = x188/100
	lab var uearnhr "hourly wage"
	drop x188
label variable x189 "Belong to labor union?"
	recode x189 (0=1 "Union") (1=0 "Not union"), gen(union)
	drop x189
label variable x190 "Who reported income data?"
label variable x191 "Second industry recode"
label variable x192 "Second occupation recode"
label variable x193 "Secondary class of worker"
label variable x194 "Dual job/unpaid job recode"
label variable x195 "Time of day begins work"
label variable x196 "AM/PM begins"
label variable x197 "Time of day ends work"
label variable x198 "AM/PM ends"
label variable x199 "Rotation group 3/other?"
	drop x190-x199
label variable x200 "Year"
	rename x200 year

	
// ---------- Sample ------------
keep if age>15	

/* Potential Experience */

gen exp = max(min(age-yearsch-6, age-16),0)
gen pexp=exp
gen pexp2=pexp^2
	
// ----------- Wages ------------	

* Top code
gen topcode=uearnwk==999
replace uearnwk = uearnwk*1.4 if uearnwk==999

* hourly wages
gen wage = uearnwk/hours if paidhre==0
replace wage = uearnhr if paidhre==1&!missing(uearnhr)

******** Bring in price level data **************
do "${cps}/programs/price_indices.do"

// ------------- Wage Restrictions -----------------
gen FT = hours>=35&!missing(hours)
gen earnsamp = (class==1|class==2)&(lfstat==1)&hours>0 & !missing(hours)

* trimming based on Lemieux 2006
gen rawwage = wage
replace wage = . if wage*cpi79<1 | wage*cpi79>100
replace wage = . if !earnsamp

gen rhrw_cpi = rawwage*cpi
gen lnrhrw_cpi = log(rhrw_cpi)

gen hrw_cpi = wage*cpi
gen hrw_ppi = wage*ppi
gen hrw_pce = wage*pce

gen wkw_cpi = hrw_cpi*hours
gen wkw_ppi = hrw_ppi*hours
gen wkw_pce = hrw_pce*hours

foreach index in cpi pce ppi {
	gen lnhrw_`index' = log(hrw_`index')
	gen lnwkw_`index' = log(wkw_`index')
	gen lnftw_`index' = log(wkw_`index') if FT==1
	}

// lswt
gen lswt = weight*(hours/35)
rename weight wgt
gen fwt = wgt

// allocation flag
gen alloc=0

// Aggregate industry from nber

    gen dind=ind70 
    recode dind (18/19=1) (17=2) (47/57=3) (67/77=4) (107/109=5) (118=6) (119/138=7) ///
    (139/149=8) (157/168 258=9) (169=10) (177/198=11) (199/209=12) (219 =13) ///
    (227=14) (228/238=15) (239/257=16) (259=18) (268/298=19) (299=20) /// 
    (307/318=21) (319/327=22) (328/337=23) (338/339=24) (347/369=25) ///
    (377/378=26) (379/387=27) (388/398=28) (407/429=29) (447/449=30) (467/479=31) ///
    (507/588=32) (607/698=33) (707/709=34) (717/718=35) ///
    (769=36) (727/748=37) (749/759=38) (777/798=39) (807/809=40) (838=41)  ///
    (828/837 839/848=42) (877/879=44) (857/868=43) (849 869 887/897= 45) ///
    (027 028 = 46) (907/937=52)

//  ------------ Occupation recode ---------
/*
g ind80=.
do "${CPSclean}\CW_ind70_ind80.do"
g ind_80=ind80
do "${CPSclean}\ind_80_des_crosswalk.do"
do "${CPSclean}\ind_des_major_recodes.do"
do "${CPSclean}\ind_des_labels.do"
*/

g ind90=.
do "${cps}/programs/CW_ind70_ind90.do"
g ind_90 = ind90 
do "${cps}/programs/ind_90_des_crosswalk.do"
do "${cps}/programs/ind_des_major_recodes.do"
do "${cps}/programs/ind_des_labels.do"

g union_mem=union
drop union

g black=0
replace black=1 if race==2

lab data "extracted on `c(current_date)'"
sort year
save ${wd}/cps/CPSMay.dta, replace
